* This do-file creates balance tables of statistics for baseline characteristics
* Paper Tables: 1, 2, S3
* NOTE: Output needs a little love to look pretty


*****
* Table 1 and S3 - Balance in Input Experiment
*****

tempfile balance

use "$out_data/survey_roster.dta", clear

*** Baseline Farmer Characteristics 

recode caste (1 4 = 0) (2 3 = 1), gen(caste_scst)

recode m_education (1 = 0) (2/7 = 1), gen(m_educ_primary)
recode f_education (1 = 0) (2/7 = 1), gen(f_educ_primary)
recode m_education (1/2 = 0) (3/7 = 1), gen(m_educ_secondary)
recode f_education (1/2 = 0) (3/7 = 1), gen(f_educ_secondary)

save `balance', replace

*** Kharif 2017 AFTER RANDOMIZATION Total Area Planted (Not just puless, Slow-moving)
use hhid plotno status_kharif plot_area_acre irrigation_source using "$k1/Farm/plots.dta", clear

* Restrict to farmed plots (owned, rented, or sharecropped)
keep if inlist(status_kharif,1,4,5)

* Count irrigated area
gen byte irrr = irrigation_source != "0"
gen byte irri = irrigation_source != "0" & irrigation_source != "1"
foreach w in i r {
	gen k17_plot_irr`w'_acre = plot_area_acre*irr`w'
}

collapse (sum) k17_plot_area_acre=plot_area_acre k17_plot_irr*, by(hhid) fast
// Winsorize plot area
foreach m in area irrr irri {
	winsor k17_plot_`m'_acre, gen(k17_plot_`m'_acre_win5) highonly p(.05)
	if "`m'"!="area" {
		gen k17_acre_`m'_frac = k17_plot_`m'_acre_win5 / k17_plot_area_acre_win5
	}
}

merge 1:1 hhid using `balance', assert(match using)
recode k17_plot_area* (.=0)		// 37 have no own-farmed plots
drop _m
save `balance', replace
save "$out_data/balance_baseline.dta", replace

*****
* Make Balance Tables
*****

local bal_resp m_gender m_age m_educ_p m_educ_s f_gender f_age f_educ_p f_educ_s
local bal_hh hh_size caste_scst bl_pulse_prev bl_wealth_win5 bl_land_win5

use "$out_data/balance_baseline.dta", clear
merge m:1 village using "$admin_data/treatment_assignment.dta", assert(match using) keepusing(treat_input treat_ext)
drop if _m==2
drop if missing(treat_input)
drop _m

merge 1:1 hhid using "$out_data/survey_drops.dta", assert(match) nogen keepusing(drop_k2 drop_k3 drop_srvy drop_ever)
gen byte drop_status = drop_srvy + drop_ever
label define drops 0 "All Surveys" 1 "Drop Phone" 2 "Drop Early"
label values drop_status drops

label var m_gender "Male"
label var m_age "Age"
label var m_educ_p "Primary School"
label var m_educ_s "Secondary School"
label var f_gender "Male"
label var f_age "Age"
label var f_educ_p "Primary School"
label var f_educ_s "Secondary School"
label var hh_size "HH Size"
label var caste_scst "SC/ST"
label var bl_pulse_prev "Past Pulses"
label var bl_wealth_win5 "Asset Index"
label var bl_land_win5 "Land Owned"
label var k17_plot_area_acre_win5 "Land Farmed"

* Replace missing values with group means for iebaltab 
* Replicates deprecated balmiss(groupmean)
foreach vbl of varlist `bal_resp' `bal_hh' {
	forvalues t = 0/1 {
		quietly sum `vbl' if drop_k2==0 & drop_k3==0 & treat_input==`t'
		replace `vbl' = `r(mean)' if missing(`vbl') & treat_input==`t'
	}
}

*** Main input balance table
iebaltab `bal_resp' `bal_hh' if drop_k2==0 & drop_k3==0, ///
	grpvar(treat_input) onerow total rowv grpc ///
	savetex("$out_tables/Table_1_ugly.tex") replace  
	
*** Appendix input balance tables:
* Attrition Status
iebaltab `bal_resp' `bal_hh', ///
	grpvar(drop_status) onerow control(0) rowv grpc ///
	savetex("$out_tables/Table_S3_ugly.tex") replace  

*****
* Table 2 - Balance in Output Experiment
*****

* File of best guesses for village match with SHRUG
use "$shrug/vilid_shrid.dta", clear
	// eliminate missing values for merge
	replace shrid2 = string(_n) if missing(shrid2)

merge 1:1 shrid2 using "$shrug/secc_shrid_bihar.dta", keep(match master)
	replace shrid2 = "" if strlen(shrid2)<5

/* The following don't match to SECC
	di	blo vil	s	d	sd		tv_id	tr	shrid2					name
	4	8	231	10	218	1247	232771	0	11-10-218-01247-232771 	chandpur
	3	5	190	10	219	1255	233470	0	11-10-219-01255-233470 	fatehpur
	3	5	191	10	219	1255	233471	1	11-10-219-01255-233471 	mehandiganj
*/


* Select items for balance table
egen matched = mean(_merge), by(treat_price)
	replace matched = (matched - 1)/2
	replace matched = . if _merge==1
gen scst = sc_share + st_share
gen hh_size = tot_p / secc_hh
egen farmland = rowtotal(*_acre_mean)
	replace farmland = farmland / land_own_share	// conditional on owning >0

local bal_hh secc_hh hh_size scst ed_prim_share ed_sec_share roof_mat_solid_share 
local bal_ag land_own_share farmland irr_share1 nco2d_cultiv_share inc_source_cultiv_share

label var matched "Match to SECC"
label var secc_hh "Num. HHs"
label var hh_size "HH Size"
label var scst "SC/ST"
label var ed_prim_share "Primary School"
label var ed_sec_share "Secondary School"
label var roof_mat_solid_share	"Solid Roof"
label var land_own_share "Frac. Landowners"
label var farmland "Land Owned"
label var irr_share1 "Share Irrigated"
label var inc_source_cultiv_share "Ag. Primary Income"
label var nco2d_cultiv_share "Ag. Empl. Share"

iebaltab `bal_hh' `bal_ag', ///
	grpvar(treat_price) onerow control(0) rowv grpc ftest ///
	savetex("$out_tables/Table_2_ugly.tex") replace 
